This file is used to generate a dataset containing all individual datasets, without melanocytes.
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
save_name = "hs_hd"
out_dir = "."
n_threads = 5 # for tSNE
We load the sample information :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_sample_info.rds"))
project_names_oi = sample_info$project_name
graphics::pie(rep(1, nrow(sample_info)),
col = sample_info$color,
labels = sample_info$project_name)
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## T_CD4 T_CD8 mac_CPVL mac_TREM2 FC CO ME IBL
## 13 13 7 10 10 15 10 15
## IF mORS HFSC_1 HFSC_2 mela
## 22 14 17 18 10
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $T_CD4
## [1] "CD3E" "CD4"
##
## $T_CD8
## [1] "CD3E" "CD8A"
##
## $mac_CPVL
## [1] "AIF1" "CPVL"
##
## $mac_TREM2
## [1] "AIF1" "TREM2"
##
## $FC
## [1] "CD3E" "CD8A"
##
## $CO
## [1] "KRT35" "KRT85"
##
## $ME
## [1] "BAMBI" "SLC7A8"
##
## $IBL
## [1] "KRT16" "KRT6B"
##
## $IF
## [1] "AQP3" "S100A8"
##
## $mORS
## [1] "IMPA2" "KRT6A"
##
## $HFSC_1
## [1] "DIO2" "TGFB2"
##
## $HFSC_2
## [1] "DIO2" "BHLHE41"
##
## $mela
## [1] "DCT" "MLANA"
For each sample, we :
We load individual datasets :
sobj_list = lapply(project_names_oi, FUN = function(one_project_name) {
subsobj = readRDS(paste0(out_dir, "/../2_individual/datasets/",
one_project_name, "_sobj_filtered.rds"))
return(subsobj)
})
names(sobj_list) = project_names_oi
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## 2021_31 27955 1043
## 2021_36 27955 602
## 2021_41 27955 2256
## 2022_03 27955 3977
## 2022_14 27955 2588
## 2022_01 27955 1213
## 2022_02 27955 2286
## 195685 13965
We represent cells in the tSNE :
name2D = "RNA_pca_20_tsne"
We look at cell type annotation for each dataset :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "cell_type",
reduction = name2D) +
ggplot2::scale_color_manual(values = color_markers,
breaks = names(color_markers),
name = "Cell Type") +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
and clustering :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "seurat_clusters",
reduction = name2D, label = TRUE) +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
For each individual dataset, we remove melanocytes. First, we smooth cell type annotation at a cluster level :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
cluster_type = table(one_sobj$cell_type, one_sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(one_sobj$cell_type)[cluster_type])
one_sobj$cluster_type = cluster_type[one_sobj$seurat_clusters]
## Output
return(one_sobj)
})
To locate melanocytes, we look at their score, cell type annotation, and clustering.
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
project_name = as.character(unique(one_sobj$project_name))
plot_sublist = list()
# Score
plot_sublist[[1]] = Seurat::FeaturePlot(one_sobj, reduction = name2D,
features = "score_mela") +
ggplot2::labs(title = project_name,
subtitle = "Melanocytes score") +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5))
# Cell type
plot_sublist[[2]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cell_type",
order = "melanocytes") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("mela", setdiff(names(color_markers), "mela"))) +
ggplot2::labs(title = "Cell type annotation",
subtitle = paste0(sum(one_sobj$cell_type == "mela"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Clusters
plot_sublist[[3]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "seurat_clusters",
label = TRUE) +
ggplot2::labs(title = "Clusters") +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Cluster type
plot_sublist[[4]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cluster_type") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("mela", setdiff(names(color_markers), "mela"))) +
ggplot2::labs(title = "Cluster annotation",
subtitle = paste0(sum(one_sobj$cluster_type == "mela"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(plot_sublist)
}) %>% unlist(., recursive = FALSE)
patchwork::wrap_plots(plot_list, ncol = 4)
We remove melanocytes based on cluster annotation :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
one_sobj$is_of_interest = (one_sobj$cluster_type != "mela")
if (sum(one_sobj$is_of_interest) > 0) {
one_sobj = subset(one_sobj, is_of_interest == TRUE)
} else {
one_sobj = NA
}
one_sobj$is_of_interest = NULL
return(one_sobj)
})
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## 2021_31 27955 885
## 2021_36 27955 528
## 2021_41 27955 1982
## 2022_03 27955 3457
## 2022_14 27955 2422
## 2022_01 27955 835
## 2022_02 27955 2002
## 195685 12111
We remove melanocytes from annotation :
cell_markers = cell_markers[names(cell_markers) != "mela"]
color_markers = color_markers[names(color_markers) != "mela"]
dotplot_markers = dotplot_markers[names(dotplot_markers) != "mela"]
We re-annote cells for cell type, since melanocytes have been removed :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
# Remove old annotation
one_sobj@meta.data[, grep(colnames(one_sobj@meta.data), pattern = "score", value = TRUE)] = NULL
# Re-annot
one_sobj = aquarius::cell_annot_custom(one_sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = FALSE,
verbose = TRUE)
# Set factor levels
one_sobj$cell_type = factor(one_sobj$cell_type, levels = names(cell_markers))
return(one_sobj)
})
We combine all datasets :
sobj = base::merge(sobj_list[[1]],
y = sobj_list[c(2:length(sobj_list))],
add.cell.ids = names(sobj_list))
sobj
## An object of class Seurat
## 27955 features across 12111 samples within 1 assay
## Active assay: RNA (27955 features, 0 variable features)
We add again the correspondence between gene names and gene ID. Since all datasets have been aligned using the same transcriptome, we take the correspondence from one individual dataset.
sobj@assays$RNA@meta.features = sobj_list[[1]]@assays$RNA@meta.features[, c("Ensembl_ID", "gene_name")]
head(sobj@assays$RNA@meta.features)
## Ensembl_ID gene_name
## MIR1302-2HG ENSG00000243485 MIR1302-2HG
## FAM138A ENSG00000237613 FAM138A
## OR4F5 ENSG00000186092 OR4F5
## AL627309.1 ENSG00000238009 AL627309.1
## AL627309.3 ENSG00000239945 AL627309.3
## AL627309.4 ENSG00000241599 AL627309.4
We remove the list of objects :
rm(sobj_list)
We keep a subset of meta.data and reset levels :
sobj@meta.data = sobj@meta.data[, c("orig.ident", "nCount_RNA", "nFeature_RNA", "log_nCount_RNA",
"project_name", "sample_identifier", "sample_type",
"Seurat.Phase", "cyclone.Phase", "percent.mt", "percent.rb",
"cell_type")]
sobj$orig.ident = factor(sobj$orig.ident, levels = levels(sample_info$project_name))
sobj$project_name = factor(sobj$project_name, levels = levels(sample_info$project_name))
sobj$sample_identifier = factor(sobj$sample_identifier, levels = levels(sample_info$sample_identifier))
sobj$sample_type = factor(sobj$sample_type, levels = levels(sample_info$sample_type))
sobj$cell_type = factor(sobj$cell_type, levels = names(color_markers))
summary(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA project_name
## 2021_31: 885 Min. : 658 Min. : 500 Min. : 6.489 2021_31: 885
## 2021_36: 528 1st Qu.: 4398 1st Qu.:1574 1st Qu.: 8.389 2021_36: 528
## 2021_41:1982 Median : 11674 Median :3043 Median : 9.365 2021_41:1982
## 2022_03:3457 Mean : 15151 Mean :3082 Mean : 9.181 2022_03:3457
## 2022_14:2422 3rd Qu.: 22206 3rd Qu.:4394 3rd Qu.:10.008 2022_14:2422
## 2022_01: 835 Max. :139803 Max. :7942 Max. :11.848 2022_01: 835
## 2022_02:2002 2022_02:2002
## sample_identifier sample_type Seurat.Phase cyclone.Phase
## HS_1: 885 HS:9274 Length:12111 Length:12111
## HS_2: 528 HD:2837 Class :character Class :character
## HS_3:1982 Mode :character Mode :character
## HS_4:3457
## HS_5:2422
## HD_1: 835
## HD_2:2002
## percent.mt percent.rb cell_type
## Min. : 0.000 Min. : 0.4948 IF :2393
## 1st Qu.: 2.902 1st Qu.:20.0623 CO :1774
## Median : 4.710 Median :24.7964 IBL :1721
## Mean : 5.025 Mean :24.1542 FC :1157
## 3rd Qu.: 6.512 3rd Qu.:29.2363 T_CD4 : 897
## Max. :19.954 Max. :46.0169 HFSC_1 : 790
## (Other):3379
We remove genes that are expressed in less than 5 cells :
sobj = aquarius::filter_features(sobj, min_cells = 5)
## [1] 27955 12111
## [1] 20003 12111
sobj
## An object of class Seurat
## 20003 features across 12111 samples within 1 assay
## Active assay: RNA (20003 features, 0 variable features)
How many cells by sample ?
table(sobj$project_name)
##
## 2021_31 2021_36 2021_41 2022_03 2022_14 2022_01 2022_02
## 885 528 1982 3457 2422 835 2002
We represent this information as a barplot :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_fill()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
This is the same barplot with another position :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_stack()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
We normalize the count matrix for remaining cells and select highly variable features :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize")
sobj = Seurat::FindVariableFeatures(sobj, nfeatures = 2000)
sobj = Seurat::ScaleData(sobj)
sobj
## An object of class Seurat
## 20003 features across 12111 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
We perform a PCA :
sobj = Seurat::RunPCA(sobj,
assay = "RNA",
reduction.name = "RNA_pca",
npcs = 100,
seed.use = 1337L)
sobj
## An object of class Seurat
## 20003 features across 12111 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
## 1 dimensional reduction calculated: RNA_pca
We choose the number of dimensions such that they summarize 60 % of the variability :
stdev = sobj@reductions[["RNA_pca"]]@stdev
stdev_prop = cumsum(stdev)/sum(stdev)
ndims = which(stdev_prop > 0.60)[1]
ndims
## [1] 38
We can visualize this on the elbow plot :
elbow_p = Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca") +
ggplot2::geom_point(x = ndims, y = stdev[ndims], col = "red")
x_text = ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>% as.numeric()
elbow_p = elbow_p +
ggplot2::scale_x_continuous(breaks = sort(c(x_text, ndims)), limits = c(0, 100))
x_color = ifelse(ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>%
as.numeric() %>% round(., 2) == round(ndims, 2), "red", "black")
elbow_p = elbow_p +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_text(color = x_color))
elbow_p
We generate a tSNE and a UMAP with 38 principal components :
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
(Time to run : 37.63 s)
We can visualize the two representations :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We remove sample specific effect on the pca using Harmony :
`%||%` = function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
set.seed(1337L)
sobj = harmony::RunHarmony(object = sobj,
group.by.vars = "project_name",
plot_convergence = TRUE,
reduction = "RNA_pca",
assay.use = "RNA",
reduction.save = "harmony",
max.iter.harmony = 50,
project.dim = FALSE)
(Time
to run : 63.26 s)
From this batch-effect removed projection, we generate a tSNE and a UMAP.
sobj = Seurat::RunUMAP(sobj,
seed.use = 1337L,
dims = 1:ndims,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_umap"),
reduction.key = paste0("harmony_", ndims, "umap_"))
sobj = Seurat::RunTSNE(sobj,
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_tsne"),
reduction.key = paste0("harmony", ndims, "tsne_"))
(Time to run : 39.48 s)
We visualize the corrected projections :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We will keep the tSNE from harmony :
reduction = "harmony"
name2D = paste0("harmony_", ndims, "_tsne")
We generate a clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = reduction, dims = 1:ndims)
sobj = Seurat::FindClusters(sobj, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12111
## Number of edges: 475472
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9021
## Number of communities: 27
## Elapsed time: 1 seconds
clusters_plot = Seurat::DimPlot(sobj, reduction = name2D, label = TRUE) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::labs(title = "Clusters ID") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
clusters_plot
We represent the 4 quality metrics :
plot_list = Seurat::FeaturePlot(sobj, reduction = name2D,
combine = FALSE, pt.size = 0.25,
features = c("percent.mt", "percent.rb", "log_nCount_RNA", "nFeature_RNA"))
plot_list = lapply(plot_list, FUN = function(one_plot) {
one_plot +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1)
})
patchwork::wrap_plots(plot_list, nrow = 1)
We can represent clusters, split by sample of origin :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
group_by = "seurat_clusters",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_color = aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))))
plot_list[[length(plot_list) + 1]] = clusters_plot +
ggplot2::labs(title = "Cluster ID") &
ggplot2::theme(plot.title = element_text(hjust = 0.5, size = 15))
patchwork::wrap_plots(plot_list, ncol = 4) &
Seurat::NoLegend()
We make a heatmap to compare the representativness of cells for each sample, within each cluster :
cluster_by_sample = table(sobj$sample_identifier,
sobj@meta.data[, "seurat_clusters"]) %>%
prop.table(margin = 1) %>%
as.matrix()
## Representative markers
cluster_markers = c("PTPRC", "CD3E", "AIF1", "MSX2", "DIO2", "GPX2", "KRT16")
ht_annot = Seurat::GetAssayData(sobj, assay = "RNA", slot = "data")[cluster_markers, ] %>%
Matrix::t() %>% as.data.frame()
ht_annot$clusters = sobj@meta.data[, "seurat_clusters"]
ht_annot = ht_annot %>%
dplyr::group_by(clusters) %>%
dplyr::summarise_all(funs('mean' = mean)) %>%
as.data.frame() %>%
dplyr::select(-clusters) %>%
`colnames<-`(c(cluster_markers))
color_fun = function(one_gene) {
gene_range = range(ht_annot[, one_gene])
gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
breaks = seq(from = gene_range[1], to = gene_range[2],
length.out = length(aquarius::color_gene)))
return(gene_palette)
}
## Bottom annotation : average gene expression
ha_bottom = ComplexHeatmap::HeatmapAnnotation(df = ht_annot,
which = "column",
show_legend = FALSE,
col = setNames(nm = cluster_markers,
lapply(cluster_markers, FUN = color_fun)),
annotation_name_side = "left")
## Right annotation : number of cells by dataset
ht_annot = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("sample_identifier", "nb_cells")) %>%
`rownames<-`(.$sample_identifier) %>%
dplyr::select(-sample_identifier)
ha_right = ComplexHeatmap::HeatmapAnnotation(
df = ht_annot,
which = "row",
show_legend = TRUE,
annotation_name_side = "top",
col = list(nb_cells = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
breaks = seq(from = range(ht_annot$nb_cells)[1],
to = range(ht_annot$nb_cells)[2],
length.out = 9))))
## Top annotation : main cell type in this cluster
ht_annot = table(sobj$sample_identifier,
sobj@meta.data[, "seurat_clusters"]) %>%
prop.table(margin = 1) %>%
as.matrix()
ht_annot = table(sobj$cell_type,
sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
ht_annot = data.frame(row.names = names(ht_annot),
cell_type = names(color_markers)[ht_annot])
ha_top = ComplexHeatmap::HeatmapAnnotation(
df = ht_annot,
which = "column",
show_legend = TRUE,
annotation_name_side = "left",
col = list(cell_type = color_markers))
## Assemble heatmap
ht = ComplexHeatmap::Heatmap(cluster_by_sample,
heatmap_legend_param = list(title = "Proportion",
col = c("#2166AC", "#F7F7F7", "#B2182B")),
bottom_annotation = ha_bottom,
right_annotation = ha_right,
top_annotation = ha_top,
cluster_rows = TRUE,
cluster_columns = TRUE,
row_title = "Sample",
row_names_gp = grid::gpar(names = sample_info$sample_identifier,
col = sample_info$color,
fontface = "bold"),
column_title = "Cluster",
row_names_side = "left",
column_names_side = "top",
column_names_rot = 0)
## Draw !
ComplexHeatmap::draw(ht, merge_legends = TRUE)
We visualize cell type :
plot_list = lapply((c(paste0("RNA_pca_", ndims, "_tsne"),
paste0("RNA_pca_", ndims, "_umap"),
paste0("harmony_", ndims, "_tsne"),
paste0("harmony_", ndims, "_umap"))),
FUN = function(one_red) {
Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = one_red,
cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle(one_red) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
})
patchwork::wrap_plots(plot_list, nrow = 2) +
patchwork::plot_layout(guides = "collect")
We make a representation split by origin to show cell types :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_by = "cell_type",
group_color = color_markers)
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We visualize cell cycle annotation, and BIRC5 and TOP2A expression levels :
plot_list = list()
# Seurat
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "Seurat annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# cyclone
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "cyclone annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# BIRC5
plot_list[[3]] = Seurat::FeaturePlot(sobj, features = "BIRC5",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# TOP2A
plot_list[[4]] = Seurat::FeaturePlot(sobj, features = "TOP2A",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, ncol = 2)
We save the Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/", save_name, "_sobj.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ComplexHeatmap_2.14.0 ggplot2_3.3.5 patchwork_1.1.2
## [4] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] here_1.0.1 TInGa_0.0.0.9000
## [59] ggraph_2.0.3 pkgconfig_2.0.3
## [61] GO.db_3.10.0 DelayedMatrixStats_1.8.0
## [63] gower_0.2.1 ggbeeswarm_0.6.0
## [65] iterators_1.0.12 DropletUtils_1.6.1
## [67] reticulate_1.26 clusterProfiler_3.14.3
## [69] SummarizedExperiment_1.16.1 circlize_0.4.15
## [71] beeswarm_0.4.0 GetoptLong_1.0.5
## [73] xfun_0.35 bslib_0.3.1
## [75] zoo_1.8-10 tidyselect_1.1.0
## [77] reshape2_1.4.4 purrr_0.3.4
## [79] ica_1.0-2 pcaPP_1.9-73
## [81] viridisLite_0.3.0 rtracklayer_1.46.0
## [83] rlang_1.0.2 hexbin_1.28.1
## [85] jquerylib_0.1.4 dyneval_0.9.9
## [87] glue_1.4.2 RColorBrewer_1.1-2
## [89] matrixStats_0.56.0 stringr_1.4.0
## [91] lava_1.6.7 europepmc_0.3
## [93] DESeq2_1.26.0 recipes_0.1.17
## [95] labeling_0.3 harmony_0.1.0
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] latticeExtra_0.6-29 biomaRt_2.42.1
## [281] IRanges_2.20.2 fastmap_1.1.0
## [283] ADGofTest_0.3 copula_1.0-0
## [285] doParallel_1.0.15 AnnotationDbi_1.48.0
## [287] vcd_1.4-8 babelwhale_1.0.1
## [289] openssl_1.4.1 scales_1.1.1
## [291] backports_1.2.1 S4Vectors_0.24.4
## [293] ipred_0.9-12 enrichplot_1.6.1
## [295] hms_1.1.1 ggforce_0.3.1
## [297] Rtsne_0.15 shiny_1.7.1
## [299] numDeriv_2016.8-1.1 polyclip_1.10-0
## [301] lazyeval_0.2.2 Formula_1.2-3
## [303] tsne_0.1-3 crayon_1.3.4
## [305] MASS_7.3-54 pROC_1.16.2
## [307] viridis_0.5.1 dynparam_1.0.0
## [309] rpart_4.1-15 zinbwave_1.8.0
## [311] compiler_3.6.3 ggtext_0.1.0